product_monthly = read.csv("product_monthly.csv")
store_monthly = read.csv("store_monthly.csv")
store_prod_monthly = read.csv("store_prod_monthly.csv")
head(product_monthly)
##   X       date   brandy cocktails      gin   liqueur    other      rum schnapps
## 1 0 2012-01-31 40220.03  50637.75 37466.92  88247.04 12774.01 172926.8 72237.21
## 2 1 2012-02-29 43110.43  42954.46 41252.70 103028.37 13963.24 198711.0 72046.88
## 3 2 2012-03-31 41327.22  54192.36 44324.61 100299.67 16715.48 206779.6 72912.93
## 4 3 2012-04-30 43424.16 103627.89 50254.82 108193.01 12392.19 214852.2 73010.12
## 5 4 2012-05-31 45033.42  95183.20 57498.30 111795.26 15949.67 255571.5 75106.14
## 6 5 2012-06-30 41014.50 112738.67 54636.41 100194.82 30200.12 262501.3 68102.11
##    tequila    vodka   whisky   total
## 1 43175.16 357925.9 336471.7 1212082
## 2 50280.09 363623.7 457594.7 1386566
## 3 50674.13 399338.4 366308.6 1352873
## 4 67537.10 425101.6 401173.7 1499567
## 5 71564.38 481024.3 500706.4 1709433
## 6 70154.15 481137.8 397346.0 1618026
dim(product_monthly)
## [1] 104  13

Total volume trend

ts_total <- ts(product_monthly$total, start=c(2012, 1), end=c(2020, 8), frequency=12)

plot(ts_total, type = 'l')#, grid.ticks.on='M')

Total volume seasonal decomposition

stl_total = stl(ts_total, s.window = "periodic")
plot(stl_total, main = "Total Volume Seasonal Decomposition")

monthplot(ts_total)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

seasonplot(ts_total)

Auto arima forecast on total volume

ts_total_train = ts(product_monthly$total[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_total_test= ts(product_monthly$total[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_total = auto.arima(ts_total_train , test = "adf", trace=T)
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : 1729.126
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 1741.34
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 1739.343
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : 1737.175
##  ARIMA(0,0,0)(0,1,0)[12]                    : 1745.553
##  ARIMA(2,0,2)(0,1,1)[12] with drift         : 1726.531
##  ARIMA(2,0,2)(0,1,0)[12] with drift         : 1724.026
##  ARIMA(2,0,2)(1,1,0)[12] with drift         : 1726.531
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : 1734.561
##  ARIMA(2,0,1)(0,1,0)[12] with drift         : 1726.938
##  ARIMA(3,0,2)(0,1,0)[12] with drift         : 1726.414
##  ARIMA(2,0,3)(0,1,0)[12] with drift         : 1726.402
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 1737.817
##  ARIMA(1,0,3)(0,1,0)[12] with drift         : 1732.761
##  ARIMA(3,0,1)(0,1,0)[12] with drift         : 1726.334
##  ARIMA(3,0,3)(0,1,0)[12] with drift         : 1728.973
##  ARIMA(2,0,2)(0,1,0)[12]                    : 1742.246
## 
##  Best model: ARIMA(2,0,2)(0,1,0)[12] with drift

Forecast total volume

plot(forecast(fit_total, h=26))

plot(forecast(fit_total, h=26))

Evaluating Arima model on total volume

AR_total.mean <-forecast(fit_total,h=26)$mean
plot(ts_total_test, main="ARIMA Forecast vs. Actual on Total Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")  
lines(AR_total.mean, col="red")

fit_total_forecast <-forecast(fit_total,h=26)
accuracy(fit_total_forecast,ts_total_test)
##                      ME      RMSE      MAE         MPE     MAPE      MASE
## Training set   930.6258  93626.27 67558.65 -0.02416018 4.037865 0.6441391
## Test set     46322.1079 112030.70 99824.41  2.14575657 5.249035 0.9517775
##                     ACF1 Theil's U
## Training set -0.02552463        NA
## Test set     -0.18548215 0.3769757

Whisky trend

ts_whisky <- ts(product_monthly$whisky, start=c(2012, 1), end=c(2020, 8), frequency=12)
stl_whisky = stl(ts_whisky, s.window = "periodic")

plot(ts_whisky, type = 'l')#, grid.ticks.on='M')

Whisky Seasonal decomposition

stl_whisky = stl(ts_whisky, s.window = "periodic")
plot(stl_whisky)

ACF test

acf(ts_whisky, lag.max = 100)

pacf(ts_whisky, lag.max = 100)

Choosing the beginning 75% of the monthly data (78 months) as training, remaining 25% as test

ts_whisky_train = ts(product_monthly$whisky[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_whisky_test= ts(product_monthly$whisky[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit1 = auto.arima(ts_whisky_train , test = "adf", trace=T)
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 1594.707
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 1590.308
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : 1586.574
##  ARIMA(0,0,0)(0,1,0)[12]                    : 1607.394
##  ARIMA(0,0,1)(0,1,0)[12] with drift         : 1584.316
##  ARIMA(0,0,1)(1,1,0)[12] with drift         : 1586.575
##  ARIMA(0,0,1)(1,1,1)[12] with drift         : Inf
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 1586.574
##  ARIMA(0,0,2)(0,1,0)[12] with drift         : 1586.563
##  ARIMA(1,0,0)(0,1,0)[12] with drift         : 1588.04
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : 1584.776
##  ARIMA(0,0,1)(0,1,0)[12]                    : 1609.084
## 
##  Best model: ARIMA(0,0,1)(0,1,0)[12] with drift

Auto Arima result with adf test

summary(fit1)
## Series: ts_whisky_train 
## ARIMA(0,0,1)(0,1,0)[12] with drift 
## 
## Coefficients:
##           ma1      drift
##       -0.4625  1720.8968
## s.e.   0.1116   209.7703
## 
## sigma^2 estimated as 1.454e+09:  log likelihood=-788.96
## AIC=1583.93   AICc=1584.32   BIC=1590.5
## 
## Training set error measures:
##                    ME     RMSE      MAE          MPE     MAPE      MASE
## Training set 176.0878 34535.17 26178.48 2.781573e-05 5.035107 0.6908371
##                      ACF1
## Training set 0.0001768834
checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,1)(0,1,0)[12] with drift
## Q* = 41.823, df = 14, p-value = 0.0001319
## 
## Model df: 2.   Total lags used: 16
qqnorm(fit1$residuals)

Forecast fit1

plot(forecast(fit1, h=26))

# plot of the test set and its prediction only

AR.mean <-forecast(fit1,h=26)$mean
plot(ts_whisky_test, main="ARIMA Forecast on Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")  
lines(AR.mean, col="red")

fit1_forecast <-forecast(fit1,h=26)
accuracy(fit1_forecast,ts_whisky_test)
##                     ME     RMSE      MAE          MPE     MAPE      MASE
## Training set  176.0878 34535.17 26178.48 2.781573e-05 5.035107 0.6908371
## Test set     3455.0143 39628.28 31624.94 1.012261e-01 5.257158 0.8345665
##                       ACF1 Theil's U
## Training set  0.0001768834        NA
## Test set     -0.4083636345 0.3214094

Supermarket - Whisky

ts_sw <- ts(store_prod_monthly$Supermarketwhisky, start=c(2012, 1), end=c(2020, 8), frequency=12)
stl_sw = stl(ts_sw, s.window = "periodic")
plot(ts_sw, type = 'l')#, grid.ticks.on='M')

plot(stl_sw)

ts_sw_train = ts(store_prod_monthly$Supermarketwhisky[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_sw_test= ts(store_prod_monthly$Supermarketwhisky[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_sw = auto.arima(ts_sw_train, test = "adf", trace=T)
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : 1591.086
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 1605.265
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 1595.382
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : 1594.662
##  ARIMA(0,0,0)(0,1,0)[12]                    : 1609.201
##  ARIMA(2,0,2)(0,1,1)[12] with drift         : 1588.656
##  ARIMA(2,0,2)(0,1,0)[12] with drift         : 1588.285
##  ARIMA(2,0,2)(1,1,0)[12] with drift         : 1588.596
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : 1601.319
##  ARIMA(2,0,1)(0,1,0)[12] with drift         : 1591.127
##  ARIMA(3,0,2)(0,1,0)[12] with drift         : 1590.679
##  ARIMA(2,0,3)(0,1,0)[12] with drift         : 1590.668
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 1599.41
##  ARIMA(1,0,3)(0,1,0)[12] with drift         : 1597.111
##  ARIMA(3,0,1)(0,1,0)[12] with drift         : 1590.966
##  ARIMA(3,0,3)(0,1,0)[12] with drift         : Inf
##  ARIMA(2,0,2)(0,1,0)[12]                    : 1597.496
## 
##  Best model: ARIMA(2,0,2)(0,1,0)[12] with drift
summary(fit_sw)
## Series: ts_sw_train 
## ARIMA(2,0,2)(0,1,0)[12] with drift 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2      drift
##       -1.2768  -0.8669  1.0659  0.5436  1156.0819
## s.e.   0.0974   0.0926  0.1814  0.1723   311.3059
## 
## sigma^2 estimated as 1.44e+09:  log likelihood=-787.43
## AIC=1586.86   AICc=1588.28   BIC=1600
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 77.80647 33561.16 24946.52 -0.152588 6.928728 0.6579856
##                     ACF1
## Training set -0.02039364
checkresiduals(fit_sw)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,2)(0,1,0)[12] with drift
## Q* = 9.0383, df = 11, p-value = 0.6184
## 
## Model df: 5.   Total lags used: 16
qqnorm(fit_sw$residuals)

plot(forecast(fit_sw, h=30))

forecast(fit_sw, h=30)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jul 2018       355061.8 306426.1 403697.5 280679.9 429443.7
## Aug 2018       353555.8 303850.7 403261.0 277538.3 429573.3
## Sep 2018       396507.8 346733.2 446282.5 320384.1 472631.6
## Oct 2018       547488.0 496228.9 598747.1 469094.0 625882.1
## Nov 2018       366338.0 313367.1 419308.8 285326.1 447349.9
## Dec 2018       536970.7 483610.2 590331.2 455362.9 618578.6
## Jan 2019       295485.2 242019.0 348951.5 213715.6 377254.8
## Feb 2019       410193.8 355823.9 464563.7 327042.2 493345.4
## Mar 2019       337139.2 281912.6 392365.8 252677.4 421601.0
## Apr 2019       341960.9 286602.7 397319.0 257297.9 426623.8
## May 2019       473199.7 417729.1 528670.3 388364.7 558034.7
## Jun 2019       392484.8 336466.5 448503.2 306812.1 478157.5
## Jul 2019       363030.8 293183.9 432877.8 256209.1 469852.5
## Aug 2019       374363.1 304036.5 444689.7 266807.8 481918.4
## Sep 2019       406645.7 336315.2 476976.2 299084.4 514207.0
## Oct 2019       560118.4 489518.6 630718.2 452145.3 668091.5
## Nov 2019       385035.5 313924.7 456146.2 276280.9 493790.0
## Dec 2019       545761.2 474436.3 617086.2 436679.1 654843.3
## Jan 2020       311664.8 240339.1 382990.5 202581.6 420748.1
## Feb 2020       425527.8 354012.6 497043.0 316154.7 534900.9
## Mar 2020       347147.1 275349.5 418944.7 237342.2 456952.0
## Apr 2020       359502.0 287613.6 431390.3 249558.3 469445.7
## May 2020       485740.1 413846.0 557634.1 375787.6 595692.5
## Jun 2020       404879.3 332857.7 476900.9 294731.8 515026.8
## Jul 2020       379946.8 295575.0 464318.6 250911.2 508982.3
## Aug 2020       385632.7 300883.8 470381.6 256020.5 515244.9
## Sep 2020       421204.6 336443.1 505966.1 291573.0 550836.1
## Oct 2020       575372.6 490156.4 660588.8 445045.7 705699.5
## Nov 2020       396550.4 310754.6 482346.1 265337.1 527763.6
## Dec 2020       561447.5 475500.9 647394.1 430003.5 692891.4
575372.6 / 360
## [1] 1598.257
AR.mean.sw <-forecast(fit_sw,h=26)$mean
plot(ts_sw_test, main="ARIMA Forecast on Supermarket Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")  
lines(AR.mean.sw, col="red")

fit_sw_forecast <-forecast(fit_sw,h=26)
accuracy(fit_sw_forecast,ts_sw_test)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   77.80647 33561.16 24946.52 -0.1525880 6.928728 0.6579856
## Test set     4764.31987 41595.13 31862.75  0.2200031 7.694372 0.8404070
##                     ACF1 Theil's U
## Training set -0.02039364        NA
## Test set     -0.33060812 0.3361376

Supermarket + Vodka

ts_sv <- ts(store_prod_monthly$Supermarketvodka, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_sv_train = ts(store_prod_monthly$Supermarketvodka[1:78], start=c(2012, 1), end=c(2018, 6), frequency=12)
ts_sv_test= ts(store_prod_monthly$Supermarketvodka[79:104], start=c(2018, 7), end=c(2020, 8), frequency=12)
fit_sv = auto.arima(ts_sv_train, test = "adf", trace=T)
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : Inf
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : 1589.069
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : 1589.727
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : 1588.185
##  ARIMA(0,0,0)(0,1,0)[12]                    : 1595.849
##  ARIMA(0,0,1)(0,1,0)[12] with drift         : 1586.219
##  ARIMA(0,0,1)(1,1,0)[12] with drift         : 1588.244
##  ARIMA(0,0,1)(1,1,1)[12] with drift         : Inf
##  ARIMA(1,0,1)(0,1,0)[12] with drift         : 1588.045
##  ARIMA(0,0,2)(0,1,0)[12] with drift         : 1587.654
##  ARIMA(1,0,0)(0,1,0)[12] with drift         : 1587.735
##  ARIMA(1,0,2)(0,1,0)[12] with drift         : 1589.044
##  ARIMA(0,0,1)(0,1,0)[12]                    : 1597.591
## 
##  Best model: ARIMA(0,0,1)(0,1,0)[12] with drift
summary(fit_sv)
## Series: ts_sv_train 
## ARIMA(0,0,1)(0,1,0)[12] with drift 
## 
## Coefficients:
##           ma1     drift
##       -0.3106  1253.684
## s.e.   0.1266   271.503
## 
## sigma^2 estimated as 1.499e+09:  log likelihood=-789.92
## AIC=1585.83   AICc=1586.22   BIC=1592.4
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 100.0396 35073.36 26151.97 -0.7145016 7.969356 0.7530406
##                    ACF1
## Training set 0.02548068
checkresiduals(fit_sv)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,1)(0,1,0)[12] with drift
## Q* = 21.795, df = 14, p-value = 0.08292
## 
## Model df: 2.   Total lags used: 16
qqnorm(fit_sv$residuals)

AR.mean.sv <-forecast(fit_sv,h=26)$mean
plot(ts_sv_test, main="ARIMA Forecast on Supermarket Whisky Volume 2018-07 - 2020-08", ylab="", xlab="Months", col="darkblue")  
lines(AR.mean.sv, col="red")

fit_sv_forecast <-forecast(fit_sv,h=26)
accuracy(fit_sv_forecast,ts_sv_test)
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set  100.0396 35073.36 26151.97 -0.7145016 7.969356 0.7530406
## Test set     9476.5216 41410.12 33582.01  1.7643123 8.137148 0.9669871
##                    ACF1 Theil's U
## Training set 0.02548068        NA
## Test set     0.04463531 0.6372044

Store type

head(store_monthly)
##   X       date     X0.0  Casino Convenience.Store Drug.Store
## 1 0 2012-01-31 51305.81 5500.00          110256.2   112604.0
## 2 1 2012-02-29 55205.01 4486.75          121661.6   112396.2
## 3 2 2012-03-31 54217.05 4727.00          127321.0   112013.1
## 4 3 2012-04-30 57251.25 3896.00          132903.4   121393.0
## 5 4 2012-05-31 71418.98 4567.00          155873.8   154428.3
## 6 5 2012-06-30 65401.16 4155.00          155662.9   121631.2
##   Liquor.Tobacco.Store Other.Grocery.or.Convenience Supermarket   total
## 1             570069.3                     70543.85    743882.7 1664162
## 2             629843.3                     71590.78    937311.4 1932495
## 3             634360.6                     66930.42    851741.2 1851310
## 4             677777.4                     73353.89   1028583.7 2095159
## 5             754024.6                    100358.39   1115182.1 2355853
## 6             649632.1                     85461.18   1204114.5 2286058
ts_supermarket = ts(store_monthly$Supermarket, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_conv = ts(store_monthly$Convenience.Store, start=c(2012, 1), end=c(2020, 8), frequency=12)
ts_liquor = ts(store_monthly$Liquor.Tobacco.Store, start=c(2012, 1), end=c(2020, 8), frequency=12)
plot(ts_supermarket, type = "l")

plot(ts_conv, type = "l")

plot(ts_liquor, type = "l")